# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
import os
import re

import scrapy
from dbutils.pooled_db import PooledDB
# useful for handling different item types with a single interface
from itemadapter import ItemAdapter
import pymysql
from poetry.items import CipaiItem, PoetryItem, AuthorItem, BookCateItem, BookItem, BookContent
from pymongo import MongoClient
from scrapy.pipelines.images import ImagesPipeline

from scrapy.exceptions import DropItem

client = MongoClient("mongodb://localhost:27017/")
mdb = client['aqie']

db_dict = {
    "host": "127.0.0.1",
    "port": 3306,
    "user": "root",
    "passwd": "123456",
    "db": "blade2",
    "maxcached": 200,  # 最大空闲数
    "charset": "utf8",
    "setsession": ['SET AUTOCOMMIT = 1'],
    "connect_timeout": 60,
    "read_timeout": 60,
    "write_timeout": 60,
}


class PoetryPipeline:
    def __init__(self):
        self.mysql_pool_list = PooledDB(pymysql, **db_dict)
        self.conn = self.mysql_pool_list.connection()
        # 使用 cursor() 方法创建一个游标对象 cursor
        self.cursor = self.conn.cursor()

    def close_spider(self, spider):
        print('----------关闭数据库资源-----------')
        # 关闭游标
        self.cursor.close()
        # 关闭连接
        self.conn.close()

    def process_item(self, item, spider):
        print('type', type(item))
        if isinstance(item, CipaiItem):
            book_cate_dict = [(item['title'][i], item['url'][i]) for i in range(len(item['title']))]
            insert_sql = """
                        insert into poem_cipai(title,url) value(%s,%s)
                        """

            self.cursor.executemany(insert_sql, book_cate_dict)
            self.conn.commit()
            return item
        elif isinstance(item, PoetryItem):
            if 'url' not in item:
                item['url'] = ''
            if 'annotation' not in item:
                item['annotation'] = ''
            if 'tag' not in item:
                item['tag'] = ''
            if 'relation' not in item:
                item['relation'] = ''
            if 'images' not in item:
                item['image_path'] = ''
            insert_sql = """
            insert into poem(url,title,dynasty,author,content,annotation,tag, relation, images) value(%s,%s,%s,%s,%s,%s,%s,%s,%s)
            """
            self.cursor.execute(insert_sql, (
                item['url'], item['title'], item['dynasty'], item['author'],
                item['content'], item['annotation'], item['tag'], item['relation'],item['image_path']))
            self.conn.commit()
            return item
        elif isinstance(item, AuthorItem):
            try:
                if 'magnum_opus' not in item:
                    item['magnum_opus'] = ''
                if 'introduction' not in item:
                    item['introduction'] = ''
                if 'dynasty' not in item:
                    item['dynasty'] = ''
                if 'images' not in item:
                    item['image_path'] = ''
                author_sql = """
                insert into poem_author(author,dynasty,introduction,magnum_opus,url, images) values(%s,%s,%s,%s,%s,%s)
                """
                self.cursor.execute(author_sql, (item['author'], item['dynasty'], item['introduction'], item['magnum_opus'], item['url'],item['image_path']))
                self.conn.commit()
            except pymysql.Error as e:
                print("error", e)
            return item
        elif isinstance(item, BookCateItem):
            # list 转 list dict 存入mongo
            collection = mdb['book_cate']
            keys = ["cate", "description"]
            items = [[item['cate'][i], item['description'][i]] for i in range(len(item['cate']))]
            res = []
            for i in range(0, len(items)):
                res.append(dict(zip(keys, items[i])))
            collection.insert_many(res)
            return item
        elif isinstance(item, BookItem):
            collection = mdb['book_chapter']
            book = {
                "book_name": item['book_name'],
                "chapters": item['chapters'],
                'content': []
            }
            filter = {'book_name': item['book_name']}
            # mongo 查询数据
            res = collection.find_one(filter)
            if not res:
                with open('../books/' + book['book_name'] + ".txt", 'w', encoding='utf8') as f:
                    # 数组写入文件
                    # f.write(str(book['chapters']))
                    f.writelines(book['chapters'])
                collection.insert_one(book)
            pass
        elif isinstance(item, BookContent):
            collection = mdb['book_chapter']
            filter = {'book_name': item['book_name']}

            # mongo 查询数据
            res = collection.find_one(filter)
            if not res:
                book = {
                    'book_name': item['book_name'],
                    'content': [{item['chapter']: item['content']}]
                }
                dirs = '../books/' + res['book_name'] + "/"
                if not os.path.exists(dirs):
                    os.makedirs(dirs)
                with open(dirs + item['chapter'] + ".txt", 'a', encoding='utf8') as f:
                    # 数组写入文件
                    f.writelines(item['content'])
                collection.insert_one(book)
            else:
                contents = res['content']
                unit = {item['chapter']: item['content']}
                contents.append(unit)
                # 更新数据
                newvalues = {"$set": {'content': contents}}
                dirs = '../books/' + res['book_name'] + "/"
                if not os.path.exists(dirs):
                    os.makedirs(dirs)
                with open(dirs + item['chapter'] + ".txt", 'a', encoding='utf8') as f:
                    # 数组写入文件
                    f.writelines(item['content'])
                collection.update_one(filter, newvalues)


class ImgsPipLine(ImagesPipeline):
    def get_media_requests(self, item, info):
        if item and (isinstance(item, PoetryItem) or isinstance(item, AuthorItem)) and 'images' in item:
            for url in item['images']:
                yield scrapy.Request(url=url)


    # 返回图片名称即可
    def file_path(self, request, response=None, info=None,*, item=None):
        if item and (isinstance(item, PoetryItem) or isinstance(item, AuthorItem)) and 'images' in item:
            url = request.url
            return url.split('/')[-1]

    def item_completed(self, results, item, info):
        if item and (isinstance(item, PoetryItem) or isinstance(item, AuthorItem)):
            image_paths = [x['path'] for ok, x in results if ok]
            if not image_paths:
                #raise DropItem("Item contains no images")
                return item
            item['image_path'] = image_paths
            return item
        else:
            return item
